import json

import requests
from lxml import etree


def get_province_city(url):
    """
    获取省份和城市
    :param url:
    :return:
    """
    response = requests.get(url)
    ans = []
    if response.status_code == 200:
        html = response.content.decode("utf-8")
        root = etree.HTML(html)
        province_list = root.xpath("/html/body/div[2]/div[2]/div/div/ul/li")
        for province in province_list:
            # /html/body/div[2]/div[2]/div/div/ul/li[1]/div[2]/div/div
            province_name = province.xpath("./div[2]/div/div/text()")[0]
            # /html/body/div[2]/div[2]/div/div/ul/li[1]/div[2]/div/ul
            citys = province.xpath("./div[2]/div/ul/li")
            tmp = []
            for city in citys:
                city_name = city.xpath("./a/text()")[0]
                tmp.append(city_name)
            ans.append(
                json.dumps({"province": province_name, "city": tmp}, ensure_ascii=False)
            )
    return ans


def main():
    url = "https://www.lianjia.com/city/"
    city = get_province_city(url)
    print(city)


if __name__ == "__main__":
    main()
